import argparse
import os
import json
import ast
from tqdm import tqdm
import sys
sys.path.append("./")
from prompt.guiOdessyPrompt import GUIODYSSEYHIGHPROMPT_FOROSATLAS, GUIODYSSEYLOWPROMPT_FOROSATLAS, GUIODYSSEYHIGHACTIONPREDICTPROMPT_FORUITARAS, GUIODYSSEYLOWACTIONPREDICTPROMPT_FORUITARAS, GUIOdessy_FROGUIR1
from utils.logging_utils import setup_logger_to_stdout
from utils.schema.GUI_OWL.common import pil_to_base64, message_translate
from preprocess_base import BasePreProcess
from utils.utils_odyssey.parameters import get_direction

logger = setup_logger_to_stdout()

def parse_args(args=None, namespace=None):
    parser = argparse.ArgumentParser(description='Origin Dataset To Json')
    parser.add_argument('--dataset_name', type=str, default="GUIOdyssey",
                        help='dataset name')
    parser.add_argument('--dataset_type', type=str, default='low', help='dataset type')
    parser.add_argument('--dataset_path', type=str, default="/data3/cpz/datasets/GUIOdyssey",
                        help='dataset path')
    parser.add_argument('--model_name', type=str, default="OS_ATLAS",
                        help='model name')
    parser.add_argument('--save_path', type=str, default="/Agent_ScanKit/datasets/json",
                        help='save path')
    return parser.parse_args()


    
class GUI_OdesssyPreProcess(BasePreProcess):
    def __init__(self, dataset_type, path, dataset_name, save_path, model_name) -> None:
        super().__init__(path, dataset_name, save_path, model_name)
        # ['CLICK', 'COMPLETE', 'TEXT', 'SCROLL', 'LONG_PRESS', 'INCOMPLETE']
        self.path = path
        self.dataset_name = dataset_name
        self.save_path = save_path
        self.model_name = model_name
        self.dataset_type = dataset_type

    
    def OS_ATLAS(self):
        sample = super().OS_ATLAS()
        def actionMapping(action):
            action_type = action['action']
            info = action['info']
            ps = action['ps']
            if action_type == 'CLICK':
                if info == 'KEY_HOME':
                    return "PRESS_HOME"
                elif info == 'KEY_BACK':
                    return "PRESS_BACK"
                elif info == 'KEY_APPSELECT':
                    return "PRESS_APPSELECT"
                else:
                    ps = ast.literal_eval(ps)[0]
                    return f"CLICK <point>[[{ps[0]}, {ps[1]}]]</point>"
            elif action_type == 'SCROLL':
                direction = get_direction({'x': info[0][0], 'y': info[0][1]}, {'x': info[1][0], 'y': info[1][1]})
                return f"SCROLL [{direction.upper()}]"
            elif action_type == 'TEXT':
                return f"TYPE [{info}]"
            elif action_type == 'LONG_PRESS':
                ps = ast.literal_eval(ps)[0]
                return f"LONG_CLICK <point>[[{ps[0]}, {ps[1]}]]</point>"
            elif action_type == 'COMPLETE':
                return "COMPLETE"
            else:
                return "INCOMPLETE"
        data = []
        for episode in tqdm(self._get_test_index()):
            metadata = self.readJson(os.path.join(self.path, 'annotations',  episode))
            metadata['step_instructions'] = [step['low_level_instruction'] for step in metadata['steps']]
            for idx in range(len(metadata['steps'])):
                step = metadata['steps'][idx]
                from copy import deepcopy
                record = deepcopy(sample)
                record['episode_id'] = metadata['episode_id']
                record['step_id'] = idx + 1
                record['goal'] = metadata['task_info']['instruction']
                record['images'] = [os.path.join(self.path, 'screenshots/screenshots', step['screenshot'])]
                record['image_size'] = [[metadata['device_info']['w'], metadata['device_info']['h']]]
                record['label'] = "action:\n"+str(actionMapping(step))
                record['info'] = step['sam2_bbox']
                if self.dataset_type == 'low':
                    record['messages'][0]['content'] = GUIODYSSEYLOWPROMPT_FOROSATLAS.replace("{finalGoal}", metadata['task_info']['instruction']).replace("{actionDesc}", step['low_level_instruction'])

                else:
                    record['messages'][0]['content'] = GUIODYSSEYHIGHPROMPT_FOROSATLAS.replace("{finalGoal}", metadata['task_info']['instruction'])
                record['messages'][0]['content'] = record['messages'][0]['content'].replace("{previousActions}", str(metadata['step_instructions'][:idx]))
                data.append(record)
        if not os.path.exists(self.save_path):
            os.makedirs(self.save_path)
        self.saveJson(data, os.path.join(self.save_path, self.dataset_type+"_random_"+self.model_name.lower()+'.json'))
        logger.info(f"transform {self.dataset_type} of {self.dataset_name} dataset to json succesfuully")
        logger.info("Finished")
                  
    def UI_TARS(self):
        sample = super().UI_TARS()
        def actionMapping(action, image_size):
            action_type = action['action']
            info = action['info']
            ps = action['ps']
            if action_type == 'CLICK':
                if info == 'KEY_HOME':
                    return "press_home"
                elif info == 'KEY_BACK':
                    return "press_back()"
                elif info == 'KEY_APPSELECT':
                    return "presss_appselect()"
                else:
                    ps = ast.literal_eval(ps)[0]
                    if "1.5" in self.model_name:
                        return f"click(start_box='({ps[0]/1000*image_size[0]},{ps[1]/1000*image_size[1]})')"
                    return f"click(start_box='({ps[0]},{ps[1]})')"
            elif action_type == 'SCROLL':
                direction = get_direction({'x': info[0][0], 'y': info[0][1]}, {'x': info[1][0], 'y': info[1][1]})
                map_direction = {'left': 'right', 'right': 'left', 'up': 'down', 'down': 'up'}
                return f"scroll(direction='{map_direction[direction]}')"
            elif action_type == 'TEXT':
                return f"type(content='{info}')"
            elif action_type == 'LONG_PRESS':
                ps = ast.literal_eval(ps)[0]
                if '1.5' in self.model_name:
                    return f"long_press(start_box='({ps[0]/1000*image_size[0]},{ps[1]/1000*image_size[1]})', time='')"
                return f"long_press(start_box='({ps[0]},{ps[1]})', time='')"
            elif action_type == 'COMPLETE':
                return "finished()"
            else:
                return "incomplete()"

        def build_history(index, metadata, image_size):
            history = []

            image_indices = range(0, index) if index <= 4 else range(index - 4, index)

            for i in range(len(metadata['steps'])):
                if i in image_indices:
                    image_history = {
                        "role": "user",
                        "content": [
                            {
                                "type": "image",
                                "image": os.path.join(self.path, 'screenshots/screenshots', metadata["steps"][i]['screenshot'])
                            }
                        ]
                    }
                    history.append(image_history)
                if i in image_indices:
                    action = actionMapping(metadata['steps'][i], image_size)
                    thought = metadata['step_instructions'][i]
                    text_history = {
                        "role": "assistant",
                        "content": [
                            {"type": "text", "text": f"Thought: {thought}\nAction: {action}"}
                        ]
                    }
                    history.append(text_history)
            return history
        
        data = []
        for episode in tqdm(self._get_test_index()):
            metadata = self.readJson(os.path.join(self.path, 'annotations',  episode))
            metadata['step_instructions'] = [step['low_level_instruction'] for step in metadata['steps']]
            action_traslate = [actionMapping(step, [metadata['device_info']['w'], metadata['device_info']['h']]) for step in metadata['steps']]
            metadata['image_size'] = [metadata['device_info']['w'], metadata['device_info']['h']]

            for idx in range(len(metadata['steps'])):
                step = metadata['steps'][idx]
                from copy import deepcopy
                record = deepcopy(sample)
                record['episode_id'] = metadata['episode_id']
                record['step_id'] = idx + 1
                record['images'] = [os.path.join(self.path, 'screenshots/screenshots', step['screenshot'])]
                record['goal'] = metadata['task_info']['instruction']  
                record['image_size'] = [metadata['image_size']]
                record['label'] = f"Thought: {metadata['step_instructions'][idx]}\nAction: {action_traslate[idx]}"
                record['info'] = step['sam2_bbox']
                if self.dataset_type == 'low':
                    record['messages'][1]['content'][0]['text'] = GUIODYSSEYLOWACTIONPREDICTPROMPT_FORUITARAS.replace("{instruction}", metadata['task_info']['instruction'])

                    if idx != 0:
                        record['messages'].extend(build_history(idx, metadata, metadata['image_size'])) 
                    record['messages'].extend([
                        {
                            "role": "user",
                            "content": [
                                {
                                    "type": "image",
                                    "image": record['images'][0]
                                }
                            ]
                        },
                        {
                            "role": "assistant",
                            "content": [
                                {
                                    "type": "text",
                                    "text": f"Thought: {metadata['step_instructions'][idx]}\n"
                                }
                            ]
                        }
                    ])
                else:
                    record['messages'][1]['content'][0]['text'] = GUIODYSSEYHIGHACTIONPREDICTPROMPT_FORUITARAS.replace("{instruction}", metadata['task_info']['instruction'])
                    if idx != 0:
                        record['messages'].extend(build_history(idx, metadata, metadata['image_size']))
                    record['messages'].extend([
                        {
                            "role": "user",
                            "content": [
                                {
                                    "type": "image",
                                    "image": record['images'][0]
                                }
                            ]
                        }
                    ])
                    
                data.append(record)
        if not os.path.exists(self.save_path):
            os.makedirs(self.save_path)
        self.saveJson(data, os.path.join(self.save_path, self.dataset_type+"_random_"+self.model_name.lower()+'.json'))
        logger.info(f"transform {self.dataset_type} of {self.dataset_name} dataset to json succesfuully")
        logger.info("Finished")


    def GUI_R1(self):
        sample = super().GUI_R1()
        def actionMapping(action, image_size):
            action_type = action['action']
            info = action['info']
            ps = action['ps']
            point = [-100, -100]
            input_text = 'no input text'
            fixed_click_map = {
                'KEY_HOME': 'press_home',
                'KEY_BACK': 'press_back',
                'KEY_APPSELECT': 'press_recent'
            }

            if action_type == 'CLICK':
                if type(info) == str and info in fixed_click_map:
                    action_name = fixed_click_map[info]
                else:
                    ps = ast.literal_eval(ps)[0]
                    point = list(ps[:2])
                    point = [int(point[0]/1000*image_size[0]), int(point[1]/1000*image_size[1])]
                    action_name = 'click'

            elif action_type == 'LONG_PRESS':
                ps = ast.literal_eval(ps)[0]
                point = list(ps[:2])
                action_name = 'long_press'

            elif action_type == 'SCROLL':
                direction = get_direction({'x': info[0][0], 'y': info[0][1]},
                                          {'x': info[1][0], 'y': info[1][1]})
                map_direction = {'left': 'right', 'right': 'left', 'up': 'down', 'down': 'up'}
                input_text = map_direction[direction]
                action_name = 'scroll'

            elif action_type == 'TEXT':
                input_text = info
                action_name = 'type'

            elif action_type == "COMPLETE":
                action_name = 'complete'

            else:
                action_name = 'impossible'

            formatted_action = [{
                'action': action_name,
                'point': point,
                'input_text': input_text
            }]
            return str(formatted_action)
        data = []
        for episode in tqdm(self._get_test_index()):
            metadata = self.readJson(os.path.join(self.path, 'annotations',  episode))
            metadata['step_instructions'] = [step['low_level_instruction'] for step in metadata['steps']]
            metadata['image_size'] = [[metadata['device_info']['w'], metadata['device_info']['h']] for _ in range(len(metadata['steps']))]
            action_traslate = [actionMapping(step, image_size) for step, image_size in zip(metadata['steps'], metadata['image_size'])]
            for idx in range(len(metadata['steps'])):
                step = metadata['steps'][idx]
                from copy import deepcopy
                record = deepcopy(sample)
                record['episode_id'] = metadata['episode_id']
                record['step_id'] = idx + 1
                record['goal'] = metadata['task_info']['instruction']
                record['images'] = [os.path.join(self.path, 'screenshots/screenshots', step['screenshot'])]
                record['image_size'] = [[metadata['device_info']['w'], metadata['device_info']['h']]]
                record['messages'][0]['content'][0]['image'] = record['images'][0]   
                record['label'] = "<think></think><answer>"+action_traslate[idx]+"</answer>"
                record['info'] = step['sam2_bbox']
                if self.dataset_type == 'low':
                    record['messages'][0]['content'][1]['text'] = '<image>\n' + GUIOdessy_FROGUIR1.replace("{text}", step['low_level_instruction'])
                else:
                    record['messages'][0]['content'][1]['text'] = '<image>\n' + GUIOdessy_FROGUIR1.replace("{text}", record['goal'])
                record['messages'][0]['content'][1]['text'] = record['messages'][0]['content'][1]['text'].replace("{history}", str(metadata['step_instructions'][:idx]))
                data.append(record)
        if not os.path.exists(self.save_path):
            os.makedirs(self.save_path)
        self.saveJson(data, os.path.join(self.save_path, self.dataset_type+"_random_"+self.model_name.lower()+'.json'))
        logger.info(f"transform {self.dataset_type} of {self.dataset_name} dataset to json succesfuully")
        logger.info("Finished")
      
    def Agent_CPM(self):
        sample = super().Agent_CPM()
        def actionMapping(action, image_size):
            action_type = action['action']
            info = action['info']
            ps = action['ps']
            if action_type == 'CLICK':
                if type(info) == str and info in ['KEY_HOME', 'KEY_BACK', 'KEY_APPSELECT']:
                    nav_type = info.split("_")[-1]
                    return str({"thought": "", "PRESS": f"{nav_type}"})
                else:
                    ps = ast.literal_eval(ps)[0]
                    return str({"thought":"", "POINT": [ps[0], ps[1]]})
            elif action_type == 'SCROLL':
                direction = get_direction({'x': info[0][0], 'y': info[0][1]},
                                          {'x': info[1][0], 'y': info[1][1]})
                return str({"thought":"", "POINT": [-100, -100], "to": direction})
            elif action_type == 'TEXT':
                return str({"thought":"", "TYPE": info})
            elif action_type == 'LONG_PRESS':
                ps = ast.literal_eval(ps)[0]
                return str({"thought":"", "POINT": [ps[0], ps[1]], "duration": 1000})
            elif action_type == 'COMPLETE':
                return str({"thought":"", 'STATUS': 'finish'})
            else:
                return str({"thought":"", 'STATUS': 'impossible'})
        
        from prompt.guiOdessyPrompt import AGENT_CPM_SYSTEM_PROMPT
        ACTION_SCHEMA = json.load(open('/Agent_ScanKit/utils/schema/agentCPMSchema.json', encoding="utf-8"))
        items = list(ACTION_SCHEMA.items())
        insert_index = 3
        items.insert(insert_index, ("required", ["thought"])) 
        ACTION_SCHEMA = dict(items)
        AGENT_CPM_SYSTEM_PROMPT = AGENT_CPM_SYSTEM_PROMPT.replace("ACTION_SCHEMA", str(ACTION_SCHEMA))

        data = []
        for episode in tqdm(self._get_test_index()):
            metadata = self.readJson(os.path.join(self.path, 'annotations',  episode))
            metadata['step_instructions'] = [step['low_level_instruction'] for step in metadata['steps']]
            metadata['image_size'] = [[metadata['device_info']['w'], metadata['device_info']['h']] for i in range(len(metadata['steps']))]
            action_traslate = [actionMapping(step, image_size) for step, image_size in zip(metadata['steps'], metadata['image_size'])]

            for idx in range(len(metadata['steps'])):
                step = metadata['steps'][idx]
                from copy import deepcopy
                record = deepcopy(sample)
                record['episode_id'] = metadata['episode_id']
                record['step_id'] = idx + 1
                record['goal'] = metadata['task_info']['instruction']
                record['images'] = [os.path.join(self.path, 'screenshots/screenshots', step['screenshot'])]
                record['image_size'] = [[metadata['device_info']['w'], metadata['device_info']['h']]] 
                record['label'] = action_traslate[idx]
                record['info'] = step['sam2_bbox']
                if self.dataset_type == 'low':
                    record['messages'][0]['content'][0] = record['messages'][0]['content'][0].replace("text_prompt", metadata['step_instructions'][idx])
                else:
                    record['messages'][0]['content'][0] = record['messages'][0]['content'][0].replace("text_prompt", metadata['task_info']['instruction'])
                record['system_prompt'] = AGENT_CPM_SYSTEM_PROMPT
                data.append(record)
        if not os.path.exists(self.save_path):
            os.makedirs(self.save_path)
        self.saveJson(data, os.path.join(self.save_path, self.dataset_type+"_random_"+self.model_name.lower()+'.json'))
        logger.info(f"transform {self.dataset_type} of {self.dataset_name} dataset to json succesfuully")
        logger.info("Finished")

    def OS_Genesis(self):
        sample = super().OS_Genesis()
        def actionMapping(action, thought, dataset_type):
            action_type = action['action']
            info = action['info']
            ps = action['ps']
            if action_type == 'CLICK':
                if type(info) == str and info in ['KEY_HOME', 'KEY_BACK', 'KEY_APPSELECT']:
                    nav_type = info.split("_")[-1].lower()
                    return f"Low-level thought: {thought} action: {{'action_type': 'navigate_{nav_type}'}}"
                else:
                    ps = ast.literal_eval(ps)[0]
                    return f'Low-level thought: {thought} action: {{"action_type": "click", "x": {ps[0]}, "y": {ps[1]}}}'
            elif action_type == 'SCROLL':
                direction = get_direction({'x': info[0][0], 'y': info[0][1]},
                                          {'x': info[1][0], 'y': info[1][1]})
                return f'Low-level thought: {thought} action: {{"action_type": "scroll", "direction": "{direction}"}}'
            elif action_type == 'TEXT':
                action_dict = {
                    "action_type": "type",
                    "text": info,
                    "x": -100,
                    "y": -100
                }
                json_action = json.dumps(action_dict)
                return f"Low-level thought: {thought} action: {json_action}"
            elif action_type == 'COMPLETE':
                return f"Low-level thought: {thought} action: {{'action_type': 'stop'}}"
            elif action_type == 'LONG_PRESS':
                ps = ast.literal_eval(ps)[0]
                return f'Low-level thought: {thought} action: {{"action_type": "long_press", "x": {ps[0]}, "y": {ps[1]}}}'
            else:
                return f"Low-level thought: {thought} action: {{'action_type': 'impossible'}}"
        
        from prompt.guiOdessyPrompt import OS_GENESIS_HIGH_PROMPT, OS_GENESIS_LOW_PROMPT
        data = []
        for episode in tqdm(self._get_test_index()):
            metadata = self.readJson(os.path.join(self.path, 'annotations',  episode))
            metadata['step_instructions'] = [step['low_level_instruction'] for step in metadata['steps']]
            action_traslate = [actionMapping(action, low_level_instruction, self.dataset_type) for (action, low_level_instruction) in zip(metadata['steps'], metadata['step_instructions'])]

            for idx in range(len(metadata['steps'])):
                step = metadata['steps'][idx]
                from copy import deepcopy
                record = deepcopy(sample)
                record['episode_id'] = metadata['episode_id']
                record['step_id'] = idx + 1
                record['goal'] = metadata['task_info']['instruction']
                record['images'] = [os.path.join(self.path, 'screenshots/screenshots', step['screenshot'])]
                record['image_size'] = [[metadata['device_info']['w'], metadata['device_info']['h']]] 
                record['label'] = action_traslate[idx]
                record['info'] = step['sam2_bbox']
                previous_actions = [f"Step {step}:{low_level}" for step, low_level in enumerate(metadata['step_instructions'][:idx])]
                if self.dataset_type == 'low':
                    record['question'] = OS_GENESIS_LOW_PROMPT.format(instruction=record['goal'], history='\n'.join(previous_actions), a11y_tree='',low_level_thought=metadata['step_instructions'][idx])
                else:
                    record['question'] = OS_GENESIS_HIGH_PROMPT.format(instruction=record['goal'], history=previous_actions, a11y_tree='')   
                data.append(record)
        if not os.path.exists(self.save_path):
            os.makedirs(self.save_path)
        self.saveJson(data, os.path.join(self.save_path, self.dataset_type+"_random_"+self.model_name.lower()+'.json'))
        logger.info(f"transform {self.dataset_type} of {self.dataset_name} dataset to json succesfuully")
        logger.info("Finished")
    

    def Aguvis(self):
        sample = super().Aguvis()
        from utils.schema.aguvisConstants import user_instruction
        def actionMapping(action):
            action_type = action['action']
            info = action['info']
            ps = action['ps']

            if action_type == 'CLICK':
                if type(info) == str and info in ['KEY_HOME', 'KEY_BACK', 'KEY_APPSELECT']:
                    nav_type = info.split("_")[-1].lower()
                    return f"assistantos\nmobile.{nav_type}()"
                else:
                    ps = ast.literal_eval(ps)[0]
                    return f"assistantos\npyautogui.click(x={ps[0]/1000}, y={ps[1]/1000})"
            elif action_type == 'SCROLL':
                direction = get_direction({'x': info[0][0], 'y': info[0][1]},
                                          {'x': info[1][0], 'y': info[1][1]})
                if direction in ['left', 'right']:
                    if direction == 'left':
                        return "assistantos\npyautogui.hscroll(page=-0.1)"
                    else:
                        return "assistantos\npyautogui.hscroll(page=0.1)"
                else:
                    if direction == 'up':
                        return "assistantos\npyautogui.scroll(page=0.1)"
                    else:
                        return "assistantos\npyautogui.scroll(page=-0.1)"
            elif action_type == 'TEXT':
                return f"assistantos\npyautogui.write(message='{info}')"
            elif action_type == 'LONG_PRESS':
                ps = ast.literal_eval(ps)[0]
                return f"assistantos\nmobile.long_press(x={ps[0]/1000}, y={ps[1]/1000})"
            elif action_type == 'COMPLETE':
                return "assistantos\nmobile.terminate(status='success')"
            else:
                return "assistantos\nmobile.terminate(status='impossible')"
        data = []
        for episode in tqdm(self._get_test_index()):
            metadata = self.readJson(os.path.join(self.path, 'annotations',  episode))
            metadata['step_instructions'] = [step['low_level_instruction'] for step in metadata['steps']]
            action_traslate = [actionMapping(step) for step in metadata['steps']]
            metadata['image_size'] = [metadata['device_info']['w'], metadata['device_info']['h']]
            for idx in range(len(metadata['steps'])):
                step = metadata['steps'][idx]
                from copy import deepcopy
                record = deepcopy(sample)
                record['episode_id'] = metadata['episode_id']
                record['step_id'] = idx + 1
                record['goal'] = metadata['task_info']['instruction']
                record['images'] = [os.path.join(self.path, 'screenshots/screenshots', step['screenshot'])]
                record['image_size'] = [[metadata['device_info']['w'], metadata['device_info']['h']]] 
                record['label'] = action_traslate[idx]
                record['info'] = step['sam2_bbox']
                previous_actions = [f"Step{step}:{low_level}" for step, low_level in enumerate(metadata['step_instructions'][:idx])]
                if self.dataset_type == 'low':
                    record['messages']['content'][1]['text'] = user_instruction.format(overall_goal=record['goal'], previous_actions=previous_actions, low_level_instruction=metadata['step_instructions'][idx])
                    record['is_low_level_instruction'] = True
                    record['low_level_instruction'] = metadata['step_instructions'][idx]
                else:
                    record['messages']['content'][1]['text'] = user_instruction.format(overall_goal=record['goal'], previous_actions=previous_actions, low_level_instruction="")
                    record['is_low_level_instruction'] = False 
                record['mode'] = 'force-plan'  
                data.append(record)
        if not os.path.exists(self.save_path):
            os.makedirs(self.save_path)
        self.saveJson(data, os.path.join(self.save_path, self.dataset_type+"_random_"+self.model_name.lower()+'.json'))
        logger.info(f"transform {self.dataset_type} of {self.dataset_name} dataset to json succesfuully")
        logger.info("Finished")


   
    def GUI_Odyssey(self):
        sample = super().GUI_Odyssey()
        def actionMapping(action):
            action_type = action['action']
            info = action['info']
            ps = action['ps']
            if action_type == 'CLICK':
                if type(info) == str and info in ['KEY_HOME', 'KEY_BACK', 'KEY_APPSELECT']:
                    nav_type = info.split("_")[-1]
                    return f'PRESS_{nav_type}'
                else:
                    ps = ast.literal_eval(ps)[0]
                    return f'CLICK: ({ps[0]}, {ps[1]})'
            elif action_type == 'SCROLL':
                direction = get_direction({'x': info[0][0], 'y': info[0][1]},
                                          {'x': info[1][0], 'y': info[1][1]})
                return f'SCROLL: {direction.upper()}'
            elif action_type == 'TEXT':
                return f'TYPE: {info}'
            elif action_type == 'LONG_PRESS':
                ps = ast.literal_eval(ps)[0]
                return f'LONG_PRESS: ({ps[0]}, {ps[1]})'
            elif action_type == 'COMPLETE':
                return 'COMPLETE'
            else:
                return "IMPOSSIBLE"
        
        data = []
        hit_index = {}
        for episode in tqdm(self._get_test_index()):
            metadata = self.readJson(os.path.join(self.path, 'annotations',  episode))
            metadata['step_instructions'] = [step['low_level_instruction'] for step in metadata['steps']]

            action_traslate = [actionMapping(action) for action in metadata['steps']]
            previous_action_history: list = []
            previous_screenshot_history: list = []
            for idx in range(len(metadata['steps'])):
                step = metadata['steps'][idx]
                from copy import deepcopy
                record = deepcopy(sample)
                record['episode_id'] = metadata['episode_id']
                record['step_id'] = idx + 1
                record['goal'] = metadata['task_info']['instruction']
                img = os.path.join(self.path, 'screenshots/screenshots', step['screenshot'])
                record['images'] = [img]
                hit_index[f"{img}"] = previous_screenshot_history[:idx]
                record['image_size'] = [[metadata['device_info']['w'], metadata['device_info']['h']]] 
                record['label'] = action_traslate[idx]
                record['info'] = step['sam2_bbox']
                
                if "low" in self.dataset_type:
                    question = record['question'].format(
                        instruction=metadata['step_instructions'][idx],
                        image_path=record['images'][0]
                    )
                else:
                    question = record['question'].format(
                        instruction=record['goal'], 
                        image_path=record['images'][0]
                     )
                if idx > 0:
                    his_img = f'\nPrevious screenshots: <img>image-history: {img}</img>'
                    his_str = '\nPrevious Actions: '
                    for idx, hi in enumerate(previous_action_history[-4:]):
                        his_str += f"{idx+1}. {hi}\n"
                        question = f"{question}{his_img}{his_str}"
                else:
                    question += f'\nPrevious screenshots: None'
                    question += f'\nPrevious Actions: None'
                question += '\nProvide the command-style action directly.'
                record['question'] = question
                previous_action_history.append(action_traslate[idx])
                previous_screenshot_history.append(img)
                data.append(record)
        if not os.path.exists(self.save_path):
            os.makedirs(self.save_path)
        self.saveJson(data, os.path.join(self.save_path, self.dataset_type+"_random_"+self.model_name.lower()+'.json'))
        self.saveJson(hit_index, os.path.join("/Agent_ScanKit/utils/utils_odyssey", f"his_index.json"))
        logger.info(f"transform {self.dataset_type} of {self.dataset_name} dataset to json succesfuully")
        logger.info("Finished")



    # TODO
    def GUI_OWL(self):
        build_system_messages, getResizedImage, build_user_messages, sample = super().GUI_OWL()
        def actionMapping(action):
            action_type = action['action']
            info = action['info']
            ps = action['ps']
            if action_type == 'CLICK':
                if info == 'KEY_HOME':
                    return f"""<thinking>\n""\n</thinking>\n<tool_call>\n{{"name": "mobile_use", "arguments": {{"action": "system_button", "button": "Home"}}}}\n</tool_call>\n<conclusion>\n""\n</conclusion>"""
                elif info == 'KEY_BACK':
                    return f"""<thinking>\n""\n</thinking>\n<tool_call>\n{{"name": "mobile_use", "arguments": {{"action": "system_button", "button": "Back"}}}}\n</tool_call>\n<conclusion>\n""\n</conclusion>"""
                elif info == 'KEY_APPSELECT':
                    return f"""<thinking>\n""\n</thinking>\n<tool_call>\n{{"name": "mobile_use", "arguments": {{"action": "system_button", "button": "Appselect"}}}}\n</tool_call>\n<conclusion>\n""\n</conclusion>"""
                else:
                    ps = ast.literal_eval(ps)[0]
                    return f"""<thinking>\n""\n</thinking>\n<tool_call>\n{{"name": "mobile_use", "arguments": {{"action": "click", "coordinate": [{ps[0]}, {ps[1]}]}}}}\n</tool_call>\n<conclusion>\n""\n</conclusion>"""
            elif action_type == 'SCROLL':
                return f"""<thinking>\n""\n</thinking>\n<tool_call>\n{{"name": "mobile_use", "arguments": {{"action": "swipe", "coordinate": [{info[0][0]}, {info[0][1]}], "coordinate2": [{info[1][0]}, {info[1][1]}]}}}}\n</tool_call>\n<conclusion>\n""\n</conclusion>"""
            elif action_type == 'TEXT':
                return f"""<thinking>\n""\n</thinking>\n<tool_call>\n{{"name": "mobile_use", "arguments": {{"action": "type", "text": "{info}"}}}}\n</tool_call>\n<conclusion>\n""\n</conclusion>"""
            elif action_type == 'LONG_PRESS':
                ps = ast.literal_eval(ps)[0]
                return f"""<thinking>\n""\n</thinking>\n<tool_call>\n{{"name": "mobile_use", "arguments": {{"action": "long_press", "coordinate": [{ps[0]}, {ps[1]}], "time": {2}}}}}\n</tool_call>\n<conclusion>\n""\n</conclusion>"""
            elif action_type == 'COMPLETE':
                return f"""<thinking>\n""\n</thinking>\n<tool_call>\n{{"name": "mobile_use", "arguments": {{"action": "terminate", "status": "success"}}}}\n</tool_call>\n<conclusion>\n""\n</conclusion>"""
            else:
                return f"""<thinking>\n""\n</thinking>\n<tool_call>\n{{"name": "mobile_use", "arguments": {{"action": "terminate", "status": "failure"}}}}\n</tool_call>\n<conclusion>\n""\n</conclusion>"""
            
        data = []
        for episode in tqdm(self._get_test_index()):
            metadata = self.readJson(os.path.join(self.path, 'annotations',  episode))
            metadata['step_instructions'] = [step['low_level_instruction'] for step in metadata['steps']]
            for idx in range(len(metadata['steps'])):
                step = metadata['steps'][idx]
                from copy import deepcopy
                record = deepcopy(sample)
                record['episode_id'] = metadata['episode_id']
                record['step_id'] = idx + 1
                record['goal'] = metadata['task_info']['instruction']
                record['images'] = [os.path.join(self.path, 'screenshots/screenshots', step['screenshot'])]
                record['image_size'] = [[metadata['device_info']['w'], metadata['device_info']['h']]]
                record['label'] = actionMapping(step)
                record['bbox'] = step['sam2_bbox']
                dummy_image = getResizedImage(record['images'][0])
                system_messages = build_system_messages(dummy_image.height, dummy_image.width)
                      
                if self.dataset_type == 'low':
                    user_messages = build_user_messages(metadata['step_instructions'][idx], enable_think=True, history=metadata['step_instructions'][:idx])
                else:
                    user_messages = build_user_messages(record['goal'], enable_think=True, history=metadata['step_instructions'][:idx])

                user_messages['content'].append({"image": record['images'][0]})
                messages = [system_messages, user_messages]
                record['messages'] = message_translate(messages, to_format='qwen')
                data.append(record)
        if not os.path.exists(self.save_path):
            os.makedirs(self.save_path)
        self.saveJson(data, os.path.join(self.save_path, self.dataset_type+"_random_"+self.model_name.lower()+'.json'))
        logger.info(f"transform {self.dataset_type} of {self.dataset_name} dataset to json succesfuully")
        logger.info("Finished")



    def _get_test_index(self):
        path = "/data3/cpz/datasets/GUIOdyssey/splits/random_split.json"
        split_json = self.readJson(path)['test']
        return split_json

        
        
    
    
if __name__ == '__main__':
    args = parse_args()
    logger.info(args)
    if args.dataset_name == 'GUIOdyssey':
        process = GUI_OdesssyPreProcess(
            args.dataset_type, args.dataset_path, args.dataset_name, args.save_path, args.model_name)
        if args.model_name == "OS_ATLAS":
            process.OS_ATLAS()
        elif args.model_name == "UI_TARS" or args.model_name == "UI_TARS_1.5":
            process.UI_TARS()
        elif args.model_name == 'GUI_R1':
            process.GUI_R1()
        elif args.model_name == 'Agent_CPM':
            process.Agent_CPM()
        elif args.model_name == 'OS_Genesis':
            process.OS_Genesis()
        elif args.model_name == 'Aguvis':
            process.Aguvis()
        elif args.model_name == 'GUI_Odyssey':
            process.GUI_Odyssey()
        elif args.model_name == 'GUI_OWL':
            process.GUI_OWL()
        else:
            logger.info("error processing")
    